*! version 5.0
* 13 August 2018
* NIDS
* Master Income do file for Nids Wave 1

* THIS IS 2nd INCOME DO FILE - MERGING DATASETS TOGETHER: 2 OF 7
* THIS DO FILE MERGES TOGETHER THE DATASETS REQUIRED FOR CREATING THE INCOME VARIABLES

*=====================================================================================================================================
* GLOBALS FOR DATA FILES, DO FILES AND VERSION SUFFIXES

* DEFINED IN "W1 Income do file (1 of 7)"



*=====================================================================================================================================

* OPENING THE PROXY, CHILD AND ADULT, ADJUSTING WHERE NESSESARY AND APPENING THE 3 DATASETS

* CHILD
use "$DataIN\Child_$VersionIN.dta", clear
*keep w1_hhid pid  w1_c_intrv_m 
rename w1_c_intrv_m w1_a_intrv_m
gen proxy=0
*keep if w1_hhid==103727 | w1_hhid==104919
sort w1_hhid
save "$DataOUT\child_append.dta", replace

* PROXY
use "$DataIN\Proxy_$VersionIN.dta", clear
keep pid  w1_hhid w1_p_*
renpfix w1_p_inc w1_a_inc 										/*Renaming proxy variables*/
sort pid
gen proxy=1
save "$DataOUT\proxy_append.dta", replace

* ADULT
use "$DataIN\Adult_$VersionIN.dta", clear
cap destring w1_a_intrv_m, replace 								/*Destring interview month*/
sort pid
gen proxy=0

* APPENDING
append using "$DataOUT\proxy_append.dta"
append using "$DataOUT\child_append.dta"
erase "$DataOUT\proxy_append.dta"
erase "$DataOUT\child_append.dta"

label variable proxy "Does the data from this person come from a proxy survey"
label define dummy 0 "No" 1 "Yes"
label values proxy dummy

sort w1_hhid
save "$DataOUT\data1.dta", replace

*===============================================================================================================================

* Bringing in the household derived variables

use "$DataIN\hhderived_$VersionIN.dta", clear
keep w1_hhid w1_geo2011 w1_prov2011
sort w1_hhid
save "$DataOUT\hhderived_temp.dta", replace

use "$DataOUT\data1.dta", clear
sort w1_hhid
merge m:1 w1_hhid using "$DataOUT\hhderived_temp.dta"
drop if _m == 2 
drop _merge
erase "$DataOUT\hhderived_temp.dta"

* formal/informal
rename w1_geo2011 geo

recode geo (1=1 "TRADITIONAL")(2=0) (3=0), gen(traditional)
recode geo (2=1 "URBAN") (1=0) (3=0), gen(urban)
recode geo (3=1 "FARM") (1=0) (2=0), gen(farm)

label values traditional dummy 
label values urban dummy 
label values farm dummy 

label variable traditional "Dummy variable for traditional housing"
label variable urban "Dummy variable for urban housing"
label variable farm "Dummy variable for farm housing"

* province
rename w1_prov2011 province
quietly tab province, gen(province_d)
rename province_d1 westerncape
rename province_d2 easterncape
rename province_d3 northerncape
rename province_d4 freestate
rename province_d5 kwazulunatal
rename province_d6 northwest
rename province_d7 gauteng
rename province_d8 mpumalanga
rename province_d9 limpopo

save "$DataOUT\data2.dta", replace

*===============================================================================================================================

* Bringing in individual derived variables

* Preparing the derived variables
use "$DataIN\indderived_$VersionIN.dta", clear
keep pid w1_best* w1_hhid
sort pid

* Resident children in the HH
gen ageunder15=w1_best_age_yrs<15
replace ageunder15=0 if pid==.
gen ageunder18=w1_best_age_yrs<18
replace ageunder18=0 if pid==.
egen hhchildren=sum(ageunder15), by(w1_hhid)
egen hhchildren18=sum(ageunder18), by(w1_hhid)
label variable hhchildren "Number of household children (under 15)"
label variable hhchildren18 "Number of household children (under 18)"
drop ageunder15
drop ageunder18

recode w1_best_gen (2=0) (-9/-3=.), gen(best_male)
save "$DataOUT\indderived_temp.dta", replace

* Merging in
use "$DataOUT\data2.dta", clear
sort pid
merge 1:m pid using "$DataOUT\indderived_temp.dta"
drop if _m ==2
drop _merge
erase "$DataOUT\indderived_temp.dta"
save "$DataOUT\data3.dta", replace

*===============================================================================================================================

* Merging in variables from the household roster

use "$DataIN\HouseholdRoster_$VersionIN.dta", clear
sort pid

* hhsize and number of children
egen hhsizem=count(w1_r_pres), by(w1_hhid) 

label variable hhsizem "Number of total members - including non-residents"

gen temp = 1 if w1_r_pres == 1
egen hhsizer=count(temp), by(w1_hhid) 
label variable hhsizer "Number of household residents"
drop temp
recode w1_r_gen (2=0) (-9/-3=.), gen(roster_male)

keep pid w1_r_marstt hhsizem hhsizer  w1_r_pres
recode w1_r_marstt (-9/-3=.) (1/2=1) (3/5=0), gen(roster_married)
gen roster_married_d=roster_married!=.
replace roster_married=0 if roster_married==.
rename w1_r_marstt roster_mstatus
label variable roster_married "Married or living with partner"
drop if w1_r_pres!=1 													/*get rid of non-residents*/
sort pid
save "$DataOUT\roster_data.dta", replace

use "$DataOUT\data3.dta", clear
sort pid
merge 1:m pid using "$DataOUT\roster_data.dta", 
drop if _m == 2
drop _merge
save "$DataOUT\data4.dta", replace
erase "$DataOUT\roster_data.dta"

*===============================================================================================================================

***Merging in variables from the household questionnaire

use "$DataIN\HHQuestionnaire_$VersionIN.dta", clear

* Household level income
recode w1_h_tinc (-9/-3=.), gen(hhq_inc)
label variable hhq_inc "Household income as per household questionnaire "
recode w1_h_tinc_show (-9/0=.) (1=0) (2=100) (3=350) (4=750) (5=1250) (6=2000) (7=3000) (8=4000) (9=5250) (10=7000) (11=9500) (12=13500) (13=23000) (14=40000) (15=75000) (16/max=.), gen(hhq_inc_ib)
gen hhq_incb=hhq_inc
replace hhq_incb=hhq_inc_ib if hhq_inc_ib!=. & hhq_incb==.
label variable hhq_incb "Household income as per household questionnaire incl brackets and imputations"

* Type of housing situation
gen homestatus=.
replace homestatus=1 if w1_h_rnt==1
replace homestatus=2 if w1_h_ownpaid==2
replace homestatus=3 if w1_h_ownpaid==1
replace homestatus=4 if w1_h_rnt==2
replace homestatus=5 if homestatus==.
label define homestatus 1 "Renting" 2 "Homeowners with mortgage" 3 "Homeowners no mortgage" 4 "Don't own or rent" 5 "Missing"
label values homestatus homestatus
gen mortgage=1 if homestatus==2
replace mortgage=0 if homestatus!=2 & homestatus!=.

* Housing expenses
recode w1_h_rntpay (-9/-3=.) (0=.), gen(rent)
gen lnrent=ln(rent)
recode w1_h_rntpot (-9/-3=.) (50000/max=.) (0=.), gen(rent_would)
gen lnrent_would=ln(rent_would)
recode w1_h_ownrnt (-9/-3=.) (50000/max=.) (0=.), gen(rent_could)
gen lnrent_could=ln(rent_could)

* Household characteristic variables
recode w1_h_dwltyp (-9/-3=.) (1=1 "Dwelling/house or brick structure") (2=2 "Traditional dwelling/hut/structure") (3=3 "Flat or apartment") (4=4 "Town/cluster/semi-detached house") (5=3) (6=6 "Dwelling/house/flat/room in backyard") (7=7 "Informal dwelling/shack in backyard") (8=8 "Informal dwelling/shack not in backyard") (9=9 "Room/flatlet ") (10=11) (11=11 "Other"), gen(hometype)
label variable hometype "Type of house"
gen hometype_d=hometype!=.
replace hometype=0 if hometype==.
recode w1_h_dwlrms (-9/-3=.) (35=.), gen(homerooms) 
label variable homerooms "Number of rooms in the house occupied by the household"
gen homerooms_d=homerooms!=.
replace homerooms=0 if homerooms==.
gen homeroomssq=homerooms^2
recode w1_h_dwlmatroof (-9/-3=.) (1=1 "Bricks or cement") (2=1) (3=3 "Corrugated iron/zinc") (4/8=7 "Other") (9=4 "Tile") (10=7 "Other") (11=5 "Thatching") (12=6 "Asbestos/cement roof sheeting") (13=7), gen(homeroof)
label variable homeroof "Material used in the roof of the house"
gen homeroof_d=homeroof!=.
replace homeroof=0 if homeroof==.
recode w1_h_dwlmatrwll (-9/-3=.) (1=1 "Bricks") (2=2 "Cement block/concrete") (3=3 "Corrugated iron/zinc") (7=4 "Mixture of mud and cement") (10=5 "Mud bricks") (4/6=6 "Other") (8/9=6) (11/13=6), gen(homewalls)
label variable homeroof "Material used in the walls of the house"
gen homewalls_d=homewalls!=.
replace homewalls=0 if homewalls==.

* Keeping and saving
keep w1_hhid hometype homerooms homeroomssq homeroof homewalls hometype_d homerooms_d homeroof_d homewalls_d hhq_inc hhq_inc_ib hhq_incb w1_h_tinc_show w1_h_tinc rent_would lnrent_would rent_could lnrent_could rent lnrent homestatus mortgage
sort w1_hhid
save "$DataOUT\hhquestionmerge.dta", replace

use "$DataOUT\data4.dta", clear
sort w1_hhid
merge m:1 w1_hhid using "$DataOUT\hhquestionmerge.dta"
drop if _m == 2
drop _merge
sort pid
save "$DataOUT\data.dta", replace

*===============================================================================================================================

erase "$DataOUT\hhquestionmerge.dta"
erase "$DataOUT\data1.dta"
erase "$DataOUT\data2.dta"
erase "$DataOUT\data3.dta"
erase "$DataOUT\data4.dta"

* end of do file 

*===============================================================================================================================
